Code
pacman::p_load(tidyverse, bibliometrix, janitor, stringi, summarytools)pacman::p_load(tidyverse, bibliometrix, janitor, stringi, summarytools)TITLE-ABS-KEY ( ( “malnutrition” OR “undernutrition” OR “nutritional deficiency” OR “protein-energy malnutrition” OR “nutritional status” ) AND ( “elderly” OR “older adults” OR “aging population” OR “geriatric” OR “frail elderly” ) AND ( “nutrition intervention” OR “dietary intake” OR “sarcopenia” OR “frailty” OR “gut microbiota” OR “muscle loss” OR “health outcomes” ) ) AND PUBYEAR > 1994 AND PUBYEAR < 2025 AND ( LIMIT-TO ( DOCTYPE , “ar” ) ) AND ( LIMIT-TO ( SRCTYPE , “j” ) ) AND ( LIMIT-TO ( PUBSTAGE , “final” ) )
eldmln_ds <- convert2df("250218_ScopusSearch.csv",
dbsource = "scopus", format = "csv") %>%
distinct(TI, .keep_all = T)
eldmln_ds %>%
head()
write_rds(eldmln_ds, "eldmln_ds.rds")eldmln_ds <- read_rds("eldmln_ds.rds")eldmln_ds %>%
head()eldmln_ds %>%
names() [1] "AU" "AF"
[3] "Author.s..ID" "TI"
[5] "PY" "SO"
[7] "VL" "IS"
[9] "Art..No." "Page.start"
[11] "Page.end" "PP"
[13] "TC" "DI"
[15] "URL" "Affiliations"
[17] "C1" "AB"
[19] "DE" "ID"
[21] "Molecular.Sequence.Numbers" "Chemicals.CAS"
[23] "Tradenames" "Manufacturers"
[25] "FU" "FX"
[27] "CR" "RP"
[29] "Editors" "PU"
[31] "Sponsors" "Conference.name"
[33] "Conference.date" "Conference.location"
[35] "Conference.code" "ISSN"
[37] "ISBN" "CODEN"
[39] "PubMed.ID" "LA"
[41] "JI" "DT"
[43] "Publication.Stage" "OA"
[45] "DB" "UT"
[47] "C1raw" "J9"
[49] "AU_UN" "AU1_UN"
[51] "AU_UN_NR" "SR_FULL"
[53] "SR"
Key Columns Names
Duplicate Title
dup_ti <- eldmln_ds %>%
count(TI) %>%
filter(n > 1) %>%
pull(TI)
eldmln_ds %>%
filter(TI %in% dup_ti) %>%
arrange(TI)Author
eldmln_ds %>%
mutate(author_count = stri_count_fixed(AF, ";") + 1) %>%
filter(author_count == 1)eldmln_ds %>%
select(TI, AF) %>%
separate(AF, into = paste0("af", 1:10), sep = ";",
extra = "drop", fill = "right") %>%
pivot_longer(cols = starts_with("af"),
names_to = "author_position",
values_to = "au_nameid") %>%
drop_na(au_nameid) %>%
mutate(
au_nameid = str_trim(au_nameid), # Trim whitespace
au_nameid = if_else(str_detect(au_nameid, "\\(.+\\)"), au_nameid, paste0(au_nameid, " (NA)")), # Handle missing Scopus IDs
au_name = str_extract(au_nameid, "^[^(]+") %>% str_trim(), # Extract name before "("
au_scid = str_extract(au_nameid, "(?<=\\().+?(?=\\))") # Extract Scopus ID inside "()"
) %>%
distinct(au_name, au_scid) %>%
group_by(au_name) %>%
filter(n_distinct(au_scid) > 1) %>%
arrange(au_name, au_scid) %>%
ungroup()eldmln_ds %>%
select(TI, AF) %>%
separate(AF, into = paste0("af", 1:10), sep = ";",
extra = "drop", fill = "right") %>%
pivot_longer(cols = starts_with("af"),
names_to = "author_position",
values_to = "au_nameid") %>%
drop_na(au_nameid) %>%
mutate(
au_nameid = str_trim(au_nameid), # Trim whitespace
au_nameid = if_else(str_detect(au_nameid, "\\(.+\\)"), au_nameid, paste0(au_nameid, " (NA)")), # Handle missing Scopus IDs
au_name = str_extract(au_nameid, "^[^(]+") %>% str_trim(), # Extract name before "("
au_scid = str_extract(au_nameid, "(?<=\\().+?(?=\\))") # Extract Scopus ID inside "()"
) %>%
distinct(au_name, au_scid) %>%
group_by(au_scid) %>%
filter(n_distinct(au_name) > 1) %>%
arrange(au_scid, au_name) %>%
ungroup()Year
eldmln_ds %>%
count(PY)Document Type
eldmln_ds %>%
count(DT)eldmln_bibres <- biblioAnalysis(eldmln_ds, sep = ";")
write_rds(eldmln_bibres, "eldmln_bibres.rds")eldmln_bibres <- read_rds("eldmln_bibres.rds")summary(eldmln_bibres)
MAIN INFORMATION ABOUT DATA
Timespan 1995 : 2024
Sources (Journals, Books, etc) 1144
Documents 4567
Annual Growth Rate % 13.31
Document Average Age 7.33
Average citations per doc 31.76
Average citations per year per doc 3.318
References 161427
DOCUMENT TYPES
article 4567
DOCUMENT CONTENTS
Keywords Plus (ID) 11981
Author's Keywords (DE) 6100
AUTHORS
Authors 22474
Author Appearances 33176
Authors of single-authored docs 157
AUTHORS COLLABORATION
Single-authored docs 171
Documents per Author 0.203
Co-Authors per Doc 7.26
International co-authorships % 16.68
Annual Scientific Production
Year Articles
1995 15
1996 20
1997 19
1998 29
1999 28
2000 36
2001 42
2002 32
2003 39
2004 45
2005 36
2006 47
2007 56
2008 57
2009 46
2010 51
2011 61
2012 74
2013 123
2014 146
2015 178
2016 163
2017 241
2018 290
2019 327
2020 411
2021 484
2022 451
2023 458
2024 562
Annual Percentage Growth Rate 13.31
Most Productive Authors
Authors Articles Authors Articles Fractionalized
1 MAEDA K 40 MAEDA K 7.63
2 VELLAS B 37 WAKABAYASHI H 7.34
3 VOLKERT D 35 VOLKERT D 7.19
4 ZHANG Y 35 VELLAS B 6.36
5 WAKABAYASHI H 34 DE GROOT LCPGM 5.40
6 LI Y 32 ZHANG Y 4.88
7 CEDERHOLM T 29 LI Y 4.80
8 CESARI M 29 VAN STAVEREN WA 4.79
9 ZHANG X 27 CHEN L-K 4.60
10 CHEN L-K 26 WON CW 4.40
Top manuscripts per citations
Paper DOI TC TCperYear NTC
1 LIM SS, 2012, LANCET 10.1016/S0140-6736(12)61766-8 9581 684.4 30.94
2 VOS T, 2012, LANCET 10.1016/S0140-6736(12)61729-2 6480 462.9 20.93
3 CLAESSON MJ, 2012, NATURE 10.1038/nature11319 2555 182.5 8.25
4 FIELDING RA, 2011, J AM MED DIR ASSOC 10.1016/j.jamda.2011.01.003 2461 164.1 25.92
5 KWEON S, 2014, INT J EPIDEMIOL 10.1093/ije/dyt228 1653 137.8 22.20
6 FOREMAN KJ, 2018, LANCET 10.1016/S0140-6736(18)31694-5 1620 202.5 47.67
7 CRUZ-JENTOFT AJ, 2014, AGE AGEING 10.1093/ageing/afu115 1447 120.6 19.44
8 MUSCARITOLI M, 2010, CLIN NUTR 10.1016/j.clnu.2009.12.004 1333 83.3 20.25
9 DEUTZ NEP, 2014, CLIN NUTR 10.1016/j.clnu.2014.04.007 1151 95.9 15.46
10 KELAIDITI E, 2013, J NUTR HEALTH AGING 10.1007/s12603-013-0367-2 757 58.2 16.25
Corresponding Author's Countries
Country Articles Freq SCP MCP MCP_Ratio
1 CHINA 471 0.1189 431 40 0.0849
2 JAPAN 440 0.1111 420 20 0.0455
3 USA 322 0.0813 281 41 0.1273
4 SPAIN 240 0.0606 205 35 0.1458
5 ITALY 224 0.0566 164 60 0.2679
6 UNITED KINGDOM 183 0.0462 133 50 0.2732
7 AUSTRALIA 176 0.0444 140 36 0.2045
8 FRANCE 172 0.0434 144 28 0.1628
9 NETHERLANDS 172 0.0434 125 47 0.2733
10 KOREA 150 0.0379 139 11 0.0733
SCP: Single Country Publications
MCP: Multiple Country Publications
Total Citations per Country
Country Total Citations Average Article Citations
1 USA 36045 111.94
2 JAPAN 9649 21.93
3 ITALY 7665 34.22
4 CHINA 6776 14.39
5 UNITED KINGDOM 6096 33.31
6 NETHERLANDS 6058 35.22
7 SPAIN 5228 21.78
8 FRANCE 5079 29.53
9 AUSTRALIA 4904 27.86
10 KOREA 4662 31.08
Most Relevant Sources
Sources Articles
1 NUTRIENTS 303
2 JOURNAL OF NUTRITION HEALTH AND AGING 241
3 CLINICAL NUTRITION 113
4 BMC GERIATRICS 98
5 CLINICAL NUTRITION ESPEN 79
6 GERIATRICS AND GERONTOLOGY INTERNATIONAL 67
7 AGING CLINICAL AND EXPERIMENTAL RESEARCH 64
8 ARCHIVES OF GERONTOLOGY AND GERIATRICS 64
9 EUROPEAN JOURNAL OF CLINICAL NUTRITION 63
10 INTERNATIONAL JOURNAL OF ENVIRONMENTAL RESEARCH AND PUBLIC HEALTH 62
Most Relevant Keywords
Author Keywords (DE) Articles Keywords-Plus (ID) Articles
1 FRAILTY 914 AGED 7019
2 MALNUTRITION 790 FEMALE 5932
3 SARCOPENIA 779 MALE 5777
4 ELDERLY 674 NUTRITIONAL STATUS 4413
5 OLDER ADULTS 408 HUMAN 4185
6 NUTRITION 386 ARTICLE 3595
7 NUTRITIONAL STATUS 350 HUMANS 3408
8 AGING 224 MALNUTRITION 3176
9 AGED 206 FRAILTY 2363
10 MORTALITY 176 GERIATRIC ASSESSMENT 2337
eldmln_ds %>%
count(PY) %>%
mutate(Gap = case_when(
PY %in% 1995:2004 ~ "1995-2004",
PY %in% 2005:2014 ~ "2005-2014",
PY %in% 2015:2024 ~ "2015-2024"
)) %>%
group_by(Gap) %>%
summarise(n = sum(n), .groups = "drop") %>%
bind_rows(.,
summarise(., Gap = "1995-2024 (total)", n = sum(n)))30 years
gm_agr_9524 <- eldmln_ds %>%
count(PY) %>%
mutate(AGR = (n - lag(n)) / lag(n) * 100) %>%
filter(!is.na(AGR)) %>%
summarise(geom_mean_agr = (exp(mean(log(1 + AGR / 100))) - 1) * 100) %>%
pull(geom_mean_agr)
gm_agr_9524[1] 13.3088
1995 - 2004
gm_agr_9504 <- eldmln_ds %>%
count(PY) %>%
filter(PY %in% 1995:2004) %>%
mutate(AGR = (n - lag(n)) / lag(n) * 100) %>%
filter(!is.na(AGR)) %>%
summarise(geom_mean_agr = (exp(mean(log(1 + AGR / 100))) - 1) * 100) %>%
pull(geom_mean_agr)
gm_agr_9504[1] 12.9831
2005 - 2014
gm_agr_0514 <- eldmln_ds %>%
count(PY) %>%
filter(PY %in% 2005:2014) %>%
mutate(AGR = (n - lag(n)) / lag(n) * 100) %>%
filter(!is.na(AGR)) %>%
summarise(geom_mean_agr = (exp(mean(log(1 + AGR / 100))) - 1) * 100) %>%
pull(geom_mean_agr)
gm_agr_0514[1] 16.83182
2015 - 2024
gm_agr_1524 <- eldmln_ds %>%
count(PY) %>%
filter(PY %in% 2015:2024) %>%
mutate(AGR = (n - lag(n)) / lag(n) * 100) %>%
filter(!is.na(AGR)) %>%
summarise(geom_mean_agr = (exp(mean(log(1 + AGR / 100))) - 1) * 100) %>%
pull(geom_mean_agr)
gm_agr_1524[1] 13.62649
#4682B4 steelblue
#CD5C5C indianred
#2E8B57 seagreen
eldmln_ds %>%
count(PY) %>%
ggplot(aes(x = PY, y = n)) +
geom_vline(xintercept = c(2004, 2014),
linetype = "dashed", color = "#CD5C5C") + # Indian Red
geom_col(fill = "#4682B4", color = "black") + # steelblue
annotate("text", x = 2006, y = 600,
label = paste0("1995-2024 AGR = ",
sprintf("%.1f", gm_agr_9524), "%"),
color = "black", hjust = 0, size = 3) + # 1995-2024
annotate("text", x = 1996, y = 100,
label = paste0("1995-2004 AGR = ",
sprintf("%.1f", gm_agr_9504), "%"),
color = "black", hjust = 0, size = 3) + # 1995-2004
annotate("text", x = 2006, y = 200,
label = paste0("2005-2014 AGR = ",
sprintf("%.1f", gm_agr_0514), "%"),
color = "black", hjust = 0, size = 3) + # 2005-2014
annotate("text", x = 2016, y = 550,
label = paste0("2015-2024 AGR = ",
sprintf("%.1f", gm_agr_1524), "%"),
color = "black", hjust = 0, size = 3) + # 2015-2024
scale_x_continuous(breaks = seq(1989, 2029, 5)) +
scale_y_continuous(breaks = seq(0, 600, 100)) +
coord_cartesian(ylim = c(0, 620)) +
labs(title = "Annual Publication Count",
x = "Publication Year",
y = "Number of Publications") +
theme_bw()eldmln_ds %>%
summarise(total_unique_journals = n_distinct(SO))eldmln_ds %>%
count(SO, sort = TRUE) %>%
slice_max(n, n = 10)eldmln_ds %>%
filter(PY %in% 1995:2004) %>%
count(SO, sort = TRUE) %>%
slice_max(n, n = 5)eldmln_ds %>%
filter(PY %in% 2005:2014) %>%
count(SO, sort = TRUE) %>%
slice_max(n, n = 5)eldmln_ds %>%
filter(PY %in% 2015:2024) %>%
count(SO, sort = TRUE) %>%
slice_max(n, n = 5)library(stringdist)
# Create a list of unique keywords
keywords <- unique(debyti$de_aukw)
# Compute pairwise distances
keyword_dist <- stringdistmatrix(keywords, keywords, method = "lv") # Levenshtein distance
# Convert to a tibble for easier filtering
keyword_pairs <- as_tibble(expand.grid(keyword1 = keywords, keyword2 = keywords)) %>%
mutate(distance = as.vector(keyword_dist)) %>%
filter(keyword1 != keyword2, distance <= 2) # Set threshold for similarity
keyword_pairs
keyword_pairs %>%
count(keyword2)library(quanteda)
library(cluster)
library(dplyr)
# Create a document-feature matrix (DFM) with tokenized keywords
dfm_keywords <- dfm(tokens(debyti$de_aukw), tolower = TRUE)
# Convert DFM to a TF-IDF matrix
dfm_tfidf <- dfm_tfidf(dfm_keywords) # Correct function for TF-IDF transformation
# Convert to a dense matrix for clustering
tfidf_matrix <- convert(dfm_tfidf, to = "matrix")
# Compute distance matrix
dist_matrix <- dist(tfidf_matrix)
# Apply hierarchical clustering
keyword_clusters <- hclust(dist_matrix)
# Cut tree into 50 clusters (adjust as needed)
debyti$keyword_cluster <- cutree(keyword_clusters, k = 50)
# Assign a standard keyword per cluster
cluster_mapping <- debyti %>%
group_by(keyword_cluster) %>%
summarise(std_keyword = first(de_aukw), .groups = "drop")
# Merge standard keywords back into debyti
debyti2 <- debyti %>%
left_join(cluster_mapping, by = "keyword_cluster") %>%
mutate(de_aukw = coalesce(std_keyword, de_aukw)) %>%
select(-std_keyword, -keyword_cluster)
# View updated dataset
head(debyti2)